# Data loading and preparing

# Load libraries
```{r}
library(rethinking)
```

# Load raw data frame
```{r}
load("./data/d_ext_w_in.RData")

## Impute wind data if needed
# Function to impute NaN with mean of 5 values before and after
impute_mean <- function(lst) {
  # Convert list to numeric vector
  vec <- unlist(lst)
  
  # Loop through the list and replace NaN values
  for (i in seq_along(vec)) {
    if (is.nan(vec[i])) {
      # Get indices for 5 values before and after
      start_idx <- max(1, i - 5)
      end_idx <- min(length(vec), i + 5)
      
      # Exclude NaN values from the calculation
      valid_values <- vec[start_idx:end_idx]
      valid_values <- valid_values[!is.nan(valid_values)]
      
      # Calculate mean and replace NaN
      if (length(valid_values) > 0) {
        vec[i] <- mean(valid_values, na.rm = TRUE)
      }
    }
  }
  
  # Convert back to list
  return(as.list(vec))
}

# Impute the NaN values in the list
d_ext_w$W <- impute_mean(d_ext_w$W)
d_ext_w$W <- as.numeric(as.character(d_ext_w$W))

## Remove NaN entries
d_sol_cw_w <- na.omit(d_ext_w)
# Note: na.omit removes all rows with a NaN entry.

## Calculate the number of removed rows and print
removed_rows <- nrow(d_ext_w) - nrow(d_sol_cw_w)
print(paste("Number of rows removed from C_MW:", removed_rows))

## Standardize predictors for dataset with wind
d_sol_cw_w$T_S_std <- standardize(d_sol_cw_w$T_S)
d_sol_cw_w$Sol_std <- standardize(d_sol_cw_w$Sol)
d_sol_cw_w$W_std <- standardize(d_sol_cw_w$W)
d_sol_cw_w$r_W_std <- standardize(d_sol_cw_w$r_W)
d_sol_cw_w$C_MW_std <- standardize(d_sol_cw_w$C_MW)

## Export the data again for processing in Python
d_sol_cw <- d_sol_cw_w
save(d_sol_cw, file = "./data/d_sol_cw_w_in.RData")
```

## Run all above
```{r}

```
